*************************************************************************************************************************************************
*Calculating each values in Table 1 
* Summary statistics for US immigrants by gender 
cd "C:\Users\your_directory"
use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
gen male=1*(sex==1)
collapse (mean)male [fw=perwt], by (foreign year)
reshape wide male, i(foreign) j(year)
list foreign male*

use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
gen female=1*(sex==2)
collapse (mean)female [fw=perwt], by (foreign year)
reshape wide female, i(foreign) j(year)
list foreign female*

* Summary statistics for age cohort 

use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
gen a1=1*(age>=18 & age<=33)
gen a2=1*(age>=34 & age<=49)
gen a3=1*(age>=50)
collapse (mean)a1 a2 a3 [fw=perwt], by (foreign year)
reshape wide a1 a2 a3, i(foreign) j(year)
list foreign a1* a2* a3*

* Summary statistics for years of schooling 
use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
gen e1=1*(educ>=0 & educ<=2)
gen e2=1*(educ>=3 & educ<=5)
gen e3=1*(educ==6)
collapse (mean)e1 e2 e3 [fw=perwt], by (foreign year)
reshape wide e1 e2 e3, i(foreign) j(year)
list foreign e1* e2* e3*

* Summary statistics for industry labor share 
use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
ren ind1950 ind
gen industry="others"
replace industry="agriculture" if ind==105
replace industry="construction" if ind==246
replace industry="nondurable manu" if ind>=406 & ind<=489
replace industry="eat and drink" if ind==679
replace industry="personal service" if ind>=826 & ind<=849

gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
keep if lab==2
drop if empstat==0 | empstat==3
gen unemp=1*(empstat==2)
replace industry="unemploy" if unemp==1
gen a=1*(educ<=6)
replace foreign=foreign*a 
collapse (rawsum)perwt, by (year foreign industry)
sort year ind
by year ind: egen to=sum(perwt)
gen share=perwt/to
keep if foreign==1
sort year ind
list year ind share

*************************************************************************************************************************************************
*Calculate each values in Table 2 
* Years of residence in the US 
use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1
gen yr=year-yrimmig
sum yr, de
gen y1=1*(yr>=0 & yr<=5)
gen y2=1*(yr>=6 & yr<=10)
gen y3=1*(yr>=11)
collapse (mean)y1 y2 y3 [fw=perwt], by (foreign year)
reshape wide y1 y2 y3, i(foreign) j(year)
list foreign y1* y2* y3* 

* Age of arrival in the US
use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>100 & citizen>1
replace bpld=1 if foreign==0
keep if foreign==1
gen agear=age-(year-yrimmig) 
gen y1=1*(agear>=0 & agear<=14)
gen y2=1*(agear>=15 & agear<=25)
gen y3=1*(agear>=26 & agear<=64)
collapse (mean)y1 y2 y3 [fw=perwt], by (foreign year)
reshape wide y1 y2 y3, i(foreign) j(year)
list foreign y1* y2* y3* 

* Immigration share by national origin, conditional on less than high school education 

use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=5
gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
gen mex=1*(bpld==20000 & foreign==1)
gen ca=1*(bpld>=21000 & bpld<=21090 & foreign==1)
gen car=1*(bpld>=25000 & bpld<=27000 & foreign==1)
gen sa=1*(bpld>=30000 & bpld<=31000 & foreign==1)
gen sea=1*(bpld>=51000 & bpld<=51990 & foreign==1)
gen oa=1*(bpld>=50000 & bpld<=50900 & foreign==1) + 1*(bpld>=52000 & bpld<=52400 & foreign==1)
gen af=1*(bpld>=60000 & bpld<=61000 & foreign==1)
gen me=1*(bpld>=53000 & bpld<=59900 & foreign==1)
gen eu=1*(bpld>=40000 & bpld<=49900 & foreign==1)
gen t=mex+ca+car+sa+sea+oa+af+me+eu
gen other=1
replace other=0 if t==1

collapse (mean)mex-other [fw=perwt], by (foreign year)
list year mex* ca* car* sa* sea* oa* af* me* eu* other*

* Immigration share by national origin, conditional on high school education or less

use "acs1970-2015.dta", clear
keep if age>=18 & age<=64
keep if educ<=6
gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
gen mex=1*(bpld==20000 & foreign==1)
gen ca=1*(bpld>=21000 & bpld<=21090 & foreign==1)
gen car=1*(bpld>=25000 & bpld<=27000 & foreign==1)
gen sa=1*(bpld>=30000 & bpld<=31000 & foreign==1)
gen sea=1*(bpld>=51000 & bpld<=51990 & foreign==1)
gen oa=1*(bpld>=50000 & bpld<=50900 & foreign==1) + 1*(bpld>=52000 & bpld<=52400 & foreign==1)
gen af=1*(bpld>=60000 & bpld<=61000 & foreign==1)
gen me=1*(bpld>=53000 & bpld<=59900 & foreign==1)
gen eu=1*(bpld>=40000 & bpld<=49900 & foreign==1)
gen t=mex+ca+car+sa+sea+oa+af+me+eu
gen other=1
replace other=0 if t==1

collapse (mean)mex-other [fw=perwt], by (foreign year)
list year mex* ca* car* sa* sea* oa* af* me* eu* other*

*************************************************************************************************************************************************
*Calculating each values in Table 3 

use "acs1970-2015.dta", clear

keep if age>=18 & age<=64
ren ind1950 ind
gen industry="others"
replace industry="agriculture" if ind==105
replace industry="construction" if ind==246
replace industry="nondurable manu" if ind>=406 & ind<=489
replace industry="eat and drink" if ind==679
replace industry="personal service" if ind>=826 & ind<=849

gen foreign=0
replace foreign=1 if bpl>=150 & citizen>1
replace bpld=1 if foreign==0
keep if lab==2
drop if empstat==0 | empstat==3
gen unemp=1*(empstat==2)
replace industry="unemploy" if unemp==1
gen a=1*(educ<=6)
replace foreign=foreign*a 
collapse (rawsum)perwt, by (year foreign industry)

sort year ind
by year ind: egen to=sum(perwt)
gen share=perwt/to
keep if foreign==1
sort year ind
list year ind share

*************************************************************************************************************************************************
*Producing Table 4  

#delimit;
clear;
set matsize 800;
set memory 200m;
set more off;

global dir2 "C:\Users\ctmcintosh\Documents\My papers\Mexican Fertility\";
global dropbox_base "C:\Users\ctmcintosh\Dropbox\Migration_JEP\";
global dropbox_dir "$dropbox_base\Brookings\Analysis\data\ReStat_2010\";
global dir "$dir2\Jan07\";
global dir3 "$dir2\Oct06\";
global temp "$dir2";
global effort "lwapp tapp bapp1 bapp2 bapp3 bapp4 bapp5 bapp6 bapp7 lwenf lwenf_full prdayhours nprdayhours total_program_officer_hours 
	border_enforcement_hours patrol_border_hours patrol_interior_hours"

/*START MAIN DATASET ASSEMBLY HERE:*/
/*Merge this into the Mexican dataset by cohort gender census year:*/
use "$dropbox_dir\mex1960-2015.dta";
sort sex birthyr year;
merge m:1 sex birthyr year using "$dropbox_dir\us1960-2015.dta";
drop if year==1980;
drop _merge;
gen age = year-birthyr;
sort statemx;


/*
/*Bringing in average children ever born for Women aged 30-60 by state & census year:*/
sort statemx year;
merge statemx year using $temp\fertilityaverages;
drop _merge;
*/

/*Bringing in the gdppc data for Mexican states & the US; first merging a single variable lgdprat16 which gives the 
	log GDP ratio in the year where a cohort turned 16:*/
sort statemx birthyr;
merge statemx birthyr using "$temp\mex_populationgdp1";
drop _merge;

/*Then bringing in the log gdp ratio for each state in each year for which a census was performed:*/
sort statemx year;
merge statemx year using  "$temp\mex_populationgdp2";
drop _merge;



/*Bringing in the data on share of gdp in agriculture in the year of birth of a cohort:*/
sort statemx birthyr;
merge statemx birthyr using  "$dir\agshare1";
drop _merge;

/*Bringing in the data on share of gdp in agriculture in the year in which a cohort turns 16:*/
sort statemx birthyr;
merge statemx birthyr using  "$dir\agshare2";
drop _merge;

/*PREPARING THE MIGRATION DATA:*/
/*Selecting the sample:  Those older than 15 and <=50 years old.*/
drop if age>50;

/*generating a state/cohort/gender specific ID which can be used to run FE within a specific group:
read this variable as sex, statemx, birthyr:*/
gen cohortid1=(sex*1000000) + (statemx*10000) + birthyr;


/*Variables that give the GDP innovations for each age cell:*/
gen innov16=lgdpinnov if age>=16 & age<26;
egen innov116=max(innov16), by(cohortid1);
replace innov16=0 if innov16==.;
gen innov26=lgdpinnov if age>=26 & age<36;
replace innov26=0 if innov26==.;
gen innov36=lgdpinnov if age>=36 & age<46;
replace innov36=0 if innov36==.;
gen innov16lag1=innov116 if age>=26 & age<36;
replace innov16lag1=0 if innov16lag1==.;
gen innov16lag2=innov116 if age>=36 & age<46;
replace innov16lag2=0 if innov16lag2==.;
gen firstinnov=innov116;
replace firstinnov=lgdpinnov if age>=26 & age<36 & firstinnov==.;

/******************************************************************
	This piece Aggregates using 9,10,11,     12,13,    14, 15, 16, and so on:*/
gen birthyrag=.;
/*Generating an aggregated birthyear variable which uses (for example) 1969,70,71,   then 72,73,  then 74,75,76,  then 77,78 as cohorts:*/
foreach x in 1920 1925 1930 1935 1940 1945 1950 1955 1960 1965 1970 1975 1980 1985 1990 1995 2000 2005 2010 2015{;
	replace birthyrag=`x' if birthyr==`x';
	replace birthyrag=`x' if birthyr==(`x'+1);
	replace birthyrag=`x' if birthyr==(`x'-1);
	replace birthyrag=(`x'+2) if birthyr==(`x'+2);
	replace birthyrag=(`x'+2) if birthyr==(`x'+3);
	};

/*Switch this off to get one-year birth cohorts:*/
collapse (sum) sob  nat nathsdo naths12  (mean) age  lpcgdpus lgdprat_hat lpcgdpmxs lgdprat16_hat lgdprat16 lgdpinnov lgdpinnov_hat innov16 innov26 innov36 agshr16 lgdpratdec lgdpratdchg, by(birthyrag sex statemx year);

/*Ten-year birthyear cohorts:*/
gen birthag=.;
foreach x in 1910 1920 1930 1940 1950 1960 1970 1980 1990 2000 2010{;
	replace birthag=`x' if birthyr>(`x'-5) & birthyr<=(`x'+5);
	};


/*RESUME DATA PREP:*/

/*base=1 the first time you see a cohort in the data (Note that in the aggregated files you have to take the base as being the first
time you see them above 5, because otherwise you're counting the base cohort at a time where some of them haven't been born yet).*/
g base=0;
egen minyr=min(year) if age>4, by(birthyr sex statemx);
replace base=1 if year==minyr;
drop minyr;
gen sobbase1=sob if base==1;
egen sobbase=mean(sobbase1), by(birthyr sex statemx);
gen migrants = sobbase - sob;

/*migperc is the percentage of a cohort that has migrated, only for those over fifteen and not in the base year, with 2.5% tails trimmed.*/
gen migperc=((sobbase-sob)/sobbase);
replace migperc=. if base==1;
replace migperc=. if age<=15;

/*TRIMMING:*/
egen migpercup=pctile(migperc), p(97.5) by(year);
egen migpercdown=pctile(migperc), p(2.5) by(year);
replace migperc=migpercup if migperc>migpercup & migperc!=.;
replace migperc=migpercdown if migperc<migpercdown & migperc!=.;
drop migpercup migpercdown;

/*The base value for US nat is the first time you see the cohort in the data, which will be 1960 for earlier
cohorts and will be cohorts aged 1-10 for later cohorts.*/
gen natratbase1=sobbase/nat if base==1;
egen natratb=mean(natratbase1), by(birthyr sex statemx);
gen lnatb1=ln(nat) if base==1;
egen lnatb=mean(lnatb1), by(birthyr sex statemx);
gen lsobbase=ln(sobbase);

/*We can't use hsdo and hs12 numbers for those aged 20 or less as there is too much skill upgrading going on.  But we don't want
to lose these observations.  So what this code does is to replace the cohort size observation for these cells with the cohort
observed in the following census when the skill upgrading has taken place.*/
sort statemx sex birthyr year ;
foreach X in nathsdo naths12 {;
	replace `X'=. if age<20;
	replace `X'=`X'[_n+1] if age>15 & age<20 & birthyr==birthyr[_n+1];
	};

/*lnatrat is the log of (Mex base/nat US current),  lnatratbase is log of (Mex base/nat US base)
note that the number of observations is much smaller for */
foreach X in nat nathsdo naths12{;
	gen `X'rat=sobbase/`X';
	gen l`X'rat=ln(`X'rat);
	};

/*The base value for US hsdo and hs12 is the first time you see schooling outcomes, which will be
the first time that you see the cohort aged 18 or older in the data.*/
foreach X in nathsdo naths12 {;
	egen `X'ratb1=min(year) if `X'rat!=., by(birthyr sex statemx);
	gen `X'ratb2=`X'rat if year==`X'ratb1;
	egen `X'ratb=mean(`X'ratb2), by(birthyr sex statemx);
	};
foreach X in nat nathsdo naths12 {;
	gen l`X'ratb=ln(`X'ratb);
	};

/*Calculating growth rates in the MX sob cohorts and the US nat cohorts:*/
sort statemx sex year birthyr;
gen lbaseprev=lsobbase[_n-1] if year==year[_n-1];
gen lbasenext=lsobbase[_n+1] if year==year[_n+1];
gen lnatprev=lnatb[_n-1] if year==year[_n-1];
gen lnatnext=lnatb[_n+1] if year==year[_n+1];
gen mxthisoverlast=lsobbase/lbaseprev;
gen mxnextoverthis=lbasenext/lsobbase;
gen usthisoverlast=lnatb/lnatprev;
gen usnextoverthis=lnatnext/lnatb;

/*Calculating growth rates in the log MX/US ratio:*/
sort statemx sex year birthyr;
gen lnatratbprev=lnatratb[_n-1] if year==year[_n-1];
gen lnatratbnext=lnatratb[_n+1] if year==year[_n+1];
gen lratlastoverthis=lnatratbprev/lnatratb;
gen lratnextoverthis=lnatratbnext/lnatratb;

/*Calculating the log ratio of State cohort size to total Mexican cohort size:*/
egen mextot=sum(sobbase1), by(birthyr sex);
gen mexrat=sobbase/mextot;
gen lmexrat=log(mexrat);

/*CALCULATING THE FIRST DIFFERENCES OF MIGPERC = DMIGPERC:*/
gen migperc2=((sobbase-sob)/sobbase);
replace migperc2=0 if migperc==. & age<=15;
sort statemx sex birthyr year;
gen dmigperc=migperc2-migperc2[_n-1] if statemx==statemx[_n-1] & sex==sex[_n-1] & birthyr==birthyr[_n-1] & age>15; 
/*Dividing the 1990 change in migration in half because it covers two decades:*/
replace dmigperc=dmigperc/2 if year==1990;
/*Doubling the 2015 change in migration because it refers to only half a decade:*/
replace dmigperc=dmigperc*2 if year==2015;

egen migpercup=pctile(dmigperc), p(97.5) by(year);
egen migpercdown=pctile(dmigperc), p(2.5) by(year);
replace dmigperc=migpercup if dmigperc>migpercup & dmigperc!=.;
replace dmigperc=migpercdown if dmigperc<migpercdown & dmigperc!=.;
drop migpercup migpercdown;
replace dmigperc=. if age<=15;

label var lnatrat "log(Mex State base/nat US current)";
label var lnatratb "log(Mex State base/nat US base)";
label var lnathsdorat "log(Mex State base/nat US hsdo current)";
label var lnathsdoratb "log(Mex State base/nat US hsdo base)";
label var lnaths12rat "log(Mex State base/nat US hs12 current)";
label var lnaths12ratb "log(Mex State base/nat US hs12 base)";

/*generating a state/cohort/gender specific ID which can be used to run FE within a specific group:
read this variable as sex, statemx, birthyr:*/
gen cohortid=(sex*1000000) + (statemx*10000) + birthyr;

/*generating a set of age decade dummies:*/
gen agdec=1 if age>=0 & age<10;
replace agdec=2 if age>=10 & age<20;
replace agdec=3 if age>=20 & age<30;
replace agdec=4 if age>=30 & age<40;
replace agdec=5 if age>=40 & age<=50;

/*The demeaned number of years since a cohort turned 16:*/
gen yrs16=age-16;
replace yrs16=. if yrs16<0;
egen yrs16m=mean(yrs16);
gen yrs16dm=yrs16-yrs16m;

gen lnat=log(nat);
gen lnaths12=log(naths12);
gen lnathsdo=log(nathsdo);
*gen lustot=log(ustot);
gen lnatint=lnat*yrs16dm;
gen lmextot=log(mextot);
gen lmexint=lmextot*yrs16dm;
gen yrs16sq=yrs16^2;
gen lsobbaseint=lsobbase*yrs16dm;
gen lgdprat16int=lgdprat16*yrs16dm;
gen lnatrat16int=lnatrat*yrs16dm;
gen lnathsdorat16int=lnathsdorat*yrs16dm;

/*The base value of the US cohort:*/
gen lnatbase1=lnat if base==1;
egen lnatbase=max(lnatbase), by(cohortid);

egen m24h=pctile(migrt24), p(66);
gen m24high=0;  replace m24high=1 if migrt24>=m24h & migrt24!=.;
egen m24l=pctile(migrt24), p(33);
gen m24low=0;  replace m24low=1 if migrt24<=m24l;

egen distl=pctile(dist_tren), p(33);
gen distlow=0;  replace distlow=1 if dist_tren<=distl;
egen disth=pctile(dist_tren), p(66);
gen disthigh=0;  replace disthigh=1 if dist_tren>=disth;
/*
/*Calculating the value of the log gdp ratio in the first time you see a cohort after they turn 16, and then keep that value.*/
gen lgdpratbase=lgdprat16;
replace lgdpratbase=lgdpratdec if base==1 & lgdpratbase==.;
egen lgdprat1base=max(lgdpratbase), by(cohortid);
replace lgdpratbase=lgdprat1base if lgdpratbase==.;

gen lgdp16lag0=lgdprat16 if age>=16 & age<26;
replace lgdp16lag0=0 if lgdp16lag0==.;
gen lgdp16lag1=lgdprat16 if age>=26 & age<36;
replace lgdp16lag1=0 if lgdp16lag1==.  & age>36 & age!=.;
gen lgdp16lag2=lgdprat16 if age>=36 & age<46;
replace lgdp16lag2=0 if lgdp16lag2==. & age<46 & age!=.;
*/

replace dmigperc=. if year==1990 & birthyrag==1970; 

gen baseage1=age if base==1;
egen baseage=mean(baseage1), by(cohortid);

*keep statemx birthyr birthag year age agdec sex baseage nat nathsdo dmigperc lnathsdorat lgdprat16 lgdpratdchg sob migrt24 lgdpinnov dist_tren distfron lnatrat yrs16sq yrs16dm migperc sobbase cohortid lsobbase birthyrag innov* lg;

label var statemx "Mexican State";
label var birthyr "Birth year";
label var birthag "Aggregated birthyear";
label var year "Census year";
label var age "Age of cohort at time of census";
label var baseage "Age at which cohort first observed";
label var nat "Size of US cohort";
label var nathsdo "Size of US HS dropout cohort";
label var dmigperc "Change in net share of cohort migrated between census intervals";
label var lgdprat16 "Log ratio of GDPs in year cohort turned 16";
label var lgdpratdchg "change in log ratio of GDPs between census intervals";
label var sob "Size of Mexican state birth cohort as of census";
label var yrs16sq "Years since cohort turned 16 squared";
label var yrs16dm "Years since cohort turned 16, demeaned";
label var migperc "Net migration rate of cohort as of census year";
label var sobbase "Size of Mexican state birth cohort when first observed";
label var agdec "Age, in decades";
label var cohortid "Unique identifier for birth cohort";
label var lsobbase "Log of sobbase";

/*State, cohort, and year fixed effects*/
xi i.statemx i.birthyr i.birthag i.year i.age i.agdec i.sex i.baseage;
gen birthyrsq=birthyrag^2;

/*dropping cohorts younger than 16 so all sample is 16-50:*/
keep if age>=16;

/*Bringing in the new data on border enforcement effort and apprehensions:*/
merge m:1 sex age year statemx using "$dropbox_dir\effort_index";

/*Predicted apprehensions are in numbers of people, need to be put in to rates using the base cohort size*/
gen app_effort_rate = effort_index/sobbase;
gen effort_sample = (app_effort_rate!=.);

/*Getting rid of 2015 since it does not appear that the net migration for that year are reliable:*/
drop if year==2015;

STOP;
#delimit;

/*TABLE 4:  ANALYSIS OF NET MIGRATION AT THE MEXICAN STATE LEVEL:*/
#delimit;
ivreg dmigperc lgdprat16  lgdpinnov   _Istatemx*   _Iyear* _Isex*  _Ibirthag*  (lnathsdorat = lnatrat)[pweight=sob] , cluster(cohortid);
outreg2 lnathsdorat lgdprat16  lgdpinnov   using "$temp/mex0", ctitle("IVall") nocons  nolabel bdec(4)  replace;
ivreg dmigperc lgdprat16  lgdpinnov   _Istatemx*   _Iyear* _Isex*  _Ibirthag*  (lnathsdorat = lnatrat) if year<2010 [pweight=sob] , cluster(cohortid);
outreg2 lnathsdorat lgdprat16  lgdpinnov   using "$temp/mex0", ctitle("IVno2010") nocons  nolabel bdec(4)  append;
ivreg dmigperc lgdprat16  lgdpinnov   _Istatemx*   _Iyear* _Isex*  _Ibirthag*  (lnathsdorat = lnatrat) if sex==1 [pweight=sob] , cluster(cohortid);
outreg2 lnathsdorat lgdprat16  lgdpinnov   using "$temp/mex0", ctitle("IVmen") nocons  nolabel bdec(4)  append;
ivreg dmigperc lgdprat16  lgdpinnov   _Istatemx*   _Iyear* _Isex*  _Ibirthag*  (lnathsdorat = lnatrat) if sex==2 [pweight=sob] , cluster(cohortid);
outreg2 lnathsdorat lgdprat16  lgdpinnov   using "$temp/mex0", ctitle("IVwomen") nocons  nolabel bdec(4)  append;



*************************************************************************************************************************************************
*Producing Table 5 and 6 of this paper. 

#delimit;
clear all;
set memory 250m;
set more off;


use "1980-2015.dta", clear;
append using "2020-2050";

merge m:1 year using "US_gdp_updated_redux.dta";
drop _merge;

gen birth_midpoint = .;
replace birth_midpoint = 1903 if birth=="1901-1905";
replace birth_midpoint = 1908 if birth=="1906-1910";
replace birth_midpoint = 1913 if birth=="1911-1915";
replace birth_midpoint = 1918 if birth=="1916-1920";
replace birth_midpoint = 1923 if birth=="1921-1925";
replace birth_midpoint = 1928 if birth=="1926-1930";
replace birth_midpoint = 1933 if birth=="1931-1935";
replace birth_midpoint = 1938 if birth=="1936-1940";
replace birth_midpoint = 1943 if birth=="1941-1945";
replace birth_midpoint = 1948 if birth=="1946-1950";
replace birth_midpoint = 1953 if birth=="1951-1955";
replace birth_midpoint = 1958 if birth=="1956-1960";
replace birth_midpoint = 1963 if birth=="1961-1965";
replace birth_midpoint = 1968 if birth=="1966-1970";
replace birth_midpoint = 1973 if birth=="1971-1975";
replace birth_midpoint = 1978 if birth=="1976-1980";
replace birth_midpoint = 1983 if birth=="1981-1985";
replace birth_midpoint = 1988 if birth=="1986-1990";
replace birth_midpoint = 1993 if birth=="1991-1995";
replace birth_midpoint = 1998 if birth=="1996-2000";
replace birth_midpoint = 2003 if birth=="2001-2005";
replace birth_midpoint = 2008 if birth=="2006-2010";
replace birth_midpoint = 2013 if birth=="2011-2015";
replace birth_midpoint = 2018 if birth=="2016-2020";
replace birth_midpoint = 2023 if birth=="2021-2025";
replace birth_midpoint = 2028 if birth=="2026-2030";
replace birth_midpoint = 2033 if birth=="2031-2035";
replace birth_midpoint = 2038 if birth=="2036-2040";
replace birth_midpoint = 2043 if birth=="2041-2045";
replace birth_midpoint = 2048 if birth=="2046-2050";

rename birth birth_bracket;
rename birth_midpoint birth_cohort;

gen age = year - birth_cohort;

drop if pop==.;

egen cohort_id = group(country birth_cohort sex);
egen cohort_group_id = group(birth_cohort sex);
gen migperc = 100 * perwt/base_pop;

sort cohort_id year;
gen dmigperc = migperc - migperc[_n-1] if cohort_id==cohort_id[_n-1];
gen ldmigperc = dmigperc[_n-1] if cohort_id==cohort_id[_n-1];

/*making migration rates decadal;*/
replace dmigperc = dmigperc*2 if year==2015;

replace country = "ElSalvador" if country=="El Salvador";
replace country = "DominicanRepublic" if country=="Dominican Republic";
replace country = "USA" if country=="United States of America";

rename perwt migrants;
rename base_pop births;
foreach x in births gdp gdp17 {;
	gen `x'1 = `x' if country=="USA";
	egen dest`x' = mean(`x'1), by(birth_cohort sex);
	drop `x'1;
	};



gen female = (sex=="female");
gen brat = births/destbirths;
gen gdprat = gdp/destgdp;
gen gdp17rat = gdp17/destgdp17;
gen lbrat = ln(brat);
gen lgdprat = ln(gdprat);
gen lgdp17rat = ln(gdp17rat);

/*creating the cross-cohort changes in lbrat*/
preserve;
collapse lbrat  female birth_cohort (first) country, by(cohort_id);
sort country female birth_cohort;
gen dlbrat = lbrat - lbrat[_n-1] if (country==country[_n-1] & female==female[_n-1]);
drop lbrat;
save "temp", replace;
restore;
merge m:1 country female birth_cohort using "temp";

drop if country=="USA";
drop if country=="Cuba";

encode country, gen(cnum);

gen young = (age<=40);
gen year1 = year if (year<=1990);
replace year1 = 0 if (year>=2000 & year!=.);
gen year2 = year if (year>=2000 & year!=.);
replace year2 = 0 if (year<=1990);

gen yeardum1 = (year<=1990);
gen yeardum2 = (year>=2000);
gen yr = year-1980;
gen yrsq = yr^2;

foreach x in year1 year2 yeardum1 yeardum2 yr yrsq lgdprat lgdp17rat lbrat female {;
	gen young_`x' = young * `x';
	};
	
forvalues x = 2/25 {;
	gen cnumyy_young_`x'=  1 if cnum==`x' & young==1;
	replace cnumyy_young_`x' = 0 if young==0;
	replace cnumyy_young_`x' = 0 if cnum!=`x';
	gen cnumx_lbrat_`x' = lbrat if cnum==`x';
	replace cnumx_lbrat_`x' = 0 if cnum!=`x';
	gen cnumz_year_`x' = yr if cnum==`x';
	replace cnumz_year_`x' = 0 if cnum!=`x';
		gen young_cnumz_year_`x' = young * cnumz_year_`x';		
	gen cnums_yearsq_`x' = yrsq if cnum==`x';
	replace cnums_yearsq_`x' = 0 if cnum!=`x';
	gen cnumzz_year1_`x' = year1 if cnum==`x';
	replace cnumzz_year1_`x' = 0 if cnum!=`x';
		gen young_cnumzz_year1_`x' = young * cnumzz_year1_`x';
	gen cnumzz_year2_`x' = year2 if cnum==`x';
	replace cnumzz_year2_`x' = 0 if cnum!=`x';
		gen young_cnumzz_year2_`x' = young * cnumzz_year2_`x';
	gen cyeardum2_`x' = 0;
	replace cyeardum2_`x' = 1 if cnum==`x' & yeardum2==1;
		gen young_cyeardum2_`x' = young * cyeardum2_`x';
	gen cnumgg_gdp_`x' = lgdprat if cnum==`x';
	replace cnumgg_gdp_`x' = 0 if cnum!=`x';
/*
	foreach y in cnumz_year cnums_yearsq cnumzz_year1 cnumzz_year2 cyeardum1 cyeardum2 {;
		gen `y'_young_`x' = `y'_`x' * young;
		};
*/
	};
foreach x in	17	22	27	32	37	42	47	52	57	62	67	72	77  {;
	gen agex_`x'_lbrat = lbrat if age==`x';
	replace agex_`x'_lbrat = 0 if age!=`x';
	gen ageyearx_`x' = yr if age==`x';
	replace ageyearx_`x' = 0 if age!=`x';
	gen ageyearsqx_`x' = yrsq if age==`x';
	replace ageyearsqx_`x' = 0 if age!=`x';
	gen agezz_year1_`x' = year1 if age==`x';
	replace agezz_year1_`x' = 0 if age!=`x';
	gen agezz_year2_`x' = year2 if age==`x';
	replace agezz_year2_`x' = 0 if age!=`x';
	gen agegg_gdp_`x' = lgdprat if age==`x';
	replace agegg_gdp_`x' = 0 if age!=`x';
	};

*keep if age>=15;
keep if age<=70;
*keep if age<=40;
*keep if (country=="Mexico" | country=="Guatemala" | country=="Honduras" |  country=="ElSalvador");

/*TABLE 5:  RESULTS OF THE PREDICTION REGRESSION:*/
reg migperc lbrat young_lbrat lgdprat young_lgdprat female young_female i.age i.cnum  cnumyy_young_*  cyeardum2* young_cyeardum2* cnumzz_year1* young_cnumzz_year1* cnumzz_year2*  young_cnumzz_year2* [aw = births] , cluster(cohort_group_id);
outreg2   using "mex0", ctitle("prediction eq") nocons  nolabel bdec(4)  replace;

predict migperc_hat;

/*calculating actual numbers of migrants based on the predictions:*/
replace migperc_hat= migperc if year<=2015;
gen number_hat = (migperc_hat/100) * births;


/*dropping 2050:*/
drop if year==2050;


STOP;
/*TABLE 6: THOUSANDS OF FOREIGN-BORN INDIVIDUALS RESIDENT IN THE US, BY YEAR:*/
/*Table of predicted foreign-born migrant counts by year and age:*/
#delimit;
drop if country=="Cuba";
drop if country=="Venezuela";
keep if (year==1980 | year==2015 | year==2040);
*gen young = (age<40);
gen old = (age>40 & age!=.);
replace number_hat = number_hat/1000;
collapse (sum) number_hat, by(country year old);
sort number_hat;
reshape wide number_hat, i(country old) j(year);
reshape wide number_hat*, i(country) j(old);
order country number_hat19800	number_hat19801	number_hat20150	number_hat20151	number_hat20400	number_hat20401;


